In this dashboard, we summarize the information from the data provided by Ohio Department of Health.
In this data set, there are 8 variables.
Today: ‘April 04, 2020’
The latest onset date is April 04, 2020.
We excluded 4 people whose age is unknown.
---
title: "Ohio COVID-19"
author: "Ying-Ju Tessa Chen"
output:
flexdashboard::flex_dashboard:
theme: journal
orientation: columns
social: ["facebook", "twitter", "linkedin"]
source_code: embed
---
```{r setup, include=FALSE}
library(flexdashboard) ## you need this package to create dashboard
```
Basic Information
=======================================================================
Column {data-width=400}
---
### Introduction
In this dashboard, we summarize the information from the data provided by Ohio Department of Health.
In this data set, there are 8 variables.
- **County**: 88 counties
- **Sex**: Female, Male, Unknown
- **Age Range**: 0-19, 20-29, 30-39, 40-49, 50-59, 60-69, 70-79, 80+, Unknown
- **Onset Data**
- **Date of Death**
- **Case Count**
- **Death Count**
- **Hospitalized Count**
### Data Preparation
1. Load the necessary packages
2. Read the data set from
3. Replace the space in each column name by an underscore
4. Remove the last row in the data table that shows the total count
```{r}
# load necessary packages
library(data.table)
library(ggplot2)
library(plotly)
library(plyr)
library(chron)
library(Hmisc)
```
```{r}
df <- fread("https://coronavirus.ohio.gov/static/COVIDSummaryData.csv")
colnames(df) <- c("County", "Sex", "Age_Range", "Onset_Date",
"Date_Of_Death", "Case_Count",
"Death_Count", "Hospitalized_Count")
# remove the last row that shows the total count and make sure the type of each variable is correct
df <- as.data.frame(df[1:(nrow(df)-1),])
df[,1:3] <- lapply(df[,1:3], factor)
df[,4:5] <- lapply(df[,4:5], function(x) as.Date(x, "%m/%d/%Y"))
df[,6:8] <- lapply(df[,6:8], as.numeric)
```
Column {data-width=600}
---
```{r}
all_dates <- names(table(df$Onset_Date))
latest_date <- sort(df$Onset_Date, decreasing = TRUE)[1]
```
### Summary Statistics
**Today: '`r format(Sys.Date(), "%B %d, %Y")`'**
**The latest onset date is `r format(latest_date, "%B %d, %Y")`.**
- Total Number of **Confirmed Cases**: `r sum(df$Case_Count)`
- Total Number of **Hospitalizations**: `r sum(df$Hospitalized_Count)`
- Total Number of **Deaths**: `r sum(df$Death_Count)`
### Age Distribution
```{r}
AGE_summary <- table(df$Age_Range)
AGE_count <- as.vector(unname(AGE_summary))
AGE <- data.frame(age=AGE_count, percent=paste0(round(AGE_count/sum(AGE_count)*100, 2), "%"))
rownames(AGE) <- names(AGE_summary)
colnames(AGE) <- c("Count", "Percent")
DT::datatable(t(AGE), options = list(
columnDefs = list(list(className = 'dt-center', targets = 0:nrow(AGE)))
))
```
### Sex Distribution
```{r}
Sex_summary <- table(df$Sex)
Sex_count <- as.vector(unname(Sex_summary))
SEX <- data.frame(sex=Sex_count, percent=paste0(round(Sex_count/sum(Sex_count)*100, 2), "%"))
rownames(SEX) <- names(Sex_summary)
colnames(SEX) <- c("Count", "Percent")
DT::datatable(t(SEX), options = list(
columnDefs = list(list(className = 'dt-center', targets = 0:nrow(SEX)))
))
```
Daily Cases
=======================================================================
Column {.tabset data-width=500}
-----------------------------------------------------------------------
```{r}
date_sum <- table(df$Onset_Date, df$Case_Count)
daily_cases <- apply(date_sum, 1, function(x) sum(x*as.numeric(colnames(date_sum))))
monthly <- data.frame(dates=as.Date(all_dates, "%Y-%m-%d"), cases=daily_cases)
rownames(monthly) <- c()
cal <- function(month, year) {
if(missing(year) && missing(month)) {
tmp <- month.day.year(Sys.Date())
year <- tmp$year
month <- tmp$month
}
if(missing(year) || missing(month)){ # year calendar
if(missing(year)) year <- month
par(mfrow=c(4,3))
tmp <- seq.dates( from=julian(1,1,year), to=julian(12,31,year) )
tmp2 <- month.day.year(tmp)
wd <- do.call(day.of.week, tmp2)
par(mar=c(1.5,1.5,2.5,1.5))
for(i in 1:12){
w <- tmp2$month == i
cs <- cumsum(wd[w]==0)
if(cs[1] > 0) cs <- cs - 1
nr <- max( cs ) + 1
plot.new()
plot.window( xlim=c(0,6), ylim=c(0,nr+1) )
text( wd[w], nr - cs -0.5 , tmp2$day[w] )
title( main=month.name[i] )
text( 0:6, nr+0.5, c('S','M','T','W','T','F','S') )
}
} else { # month calendar
ld <- seq.dates( from=julian(month,1,year), length=2, by='months')[2]-1
days <- seq.dates( from=julian(month,1,year), to=ld)
tmp <- month.day.year(days)
wd <- do.call(day.of.week, tmp)
cs <- cumsum(wd == 0)
if(cs[1] > 0) cs <- cs - 1
nr <- max(cs) + 1
par(oma=c(0.1,0.1,4.6,0.1))
par(mfrow=c(nr,7))
par(mar=c(0,0,0,0))
for(i in seq_len(wd[1])){
plot.new()
#box()
}
day.name <- c('Sun','Mon','Tues','Wed','Thur','Fri','Sat')
for(i in tmp$day){
plot.new()
box()
text(0,1, i, adj=c(0,1))
if(i < 8) mtext( day.name[wd[i]+1], line=0.5,
at=grconvertX(0.5,to='ndc'), outer=TRUE )
}
mtext(month.name[month], line=2.5, at=0.5, cex=1.75, outer=TRUE)
#box('inner') #optional
}
}
week_days <- function(x){
days <- c(1:7)
names(days) <- c("Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday")
days_index <- which(names(days)==x)
return(unname(days[days_index]))
}
```
```{r , message=FALSE, echo=FALSE, cache=TRUE, error=FALSE, results='asis'}
for (i in month(latest_date):2){
df_m <- monthly[which(month(monthly$dates)==i),]
first_day <- weekdays(as.Date(paste0("2020-", i, "-01"), "%Y-%m-%d"))
C_matrix <- matrix(NA, ncol=3, nrow=monthDays(as.Date(paste0("2020-", i, "-01"))))
total_days <- week_days(first_day):(week_days(first_day)+monthDays(as.Date(paste0("2020-", i, "-01")))-1)
C_matrix[,1] <- ceiling(total_days/7)
C_matrix[,2] <- total_days%%7
C_matrix[,2] <- ifelse(C_matrix[,2]==0, 7, C_matrix[,2])
for (j in 1:nrow(df_m)){
C_matrix[mday(df_m$dates[j]),3] <- df_m$cases[j]
}
cat('### ', month.abb[i],' \n')
cal(i, 2020)
for (k in mday(df_m$dates)){
par(mfg=C_matrix[k,1:2])
text(.5, .5, as.character(C_matrix[k,3]), cex=2)
}
cat('\n \n')
}
```
Column {.tabset data-width=500}
-----------------------------------------------------------------------
### Distribution of Daily Cases
```{r}
D <- data.frame(Dates=names(daily_cases), cases=unname(daily_cases))
p_dates <- plot_ly(D, x=~Dates, y=~cases, type="bar", text=as.character(cumsum(daily_cases)), name="",
hovertemplate = paste('%{x}', '
Daily Cases: %{y:s}
',
'Total Cases: %{text:s}'))
p_dates <- p_dates %>% layout(uniformtext=list(minsize=8,mode='hide')) %>% config(displayModeBar = F)
p_dates
```
Advance Information
=======================================================================
Column {data-width=500}
---
### Distribution of Confirmed Cases by the Age Range
**We excluded `r length(which(df$Age_Range=="Unknown"))` people whose age is unknown.**
\
```{r}
# remove the cases for which the age range is "Unknown"
if (length(which(df$Age_Range=="Unknown"))==0){
df1 <- df
}else{
df1 <- df[-which(df$Age_Range=="Unknown"),]
}
df1$Age_Range <- factor(df1$Age_Range)
# find counts and relative counts (%) in each age range
Age_Dist <- table(df1$Age_Range, df1$Case_Count)
n <- sum(apply(Age_Dist, 1, function(x) sum(x*as.numeric(colnames(Age_Dist)))))
Age_Percent <- round(apply(Age_Dist, 1, function(x) sum(x*as.numeric(colnames(Age_Dist))))/n*100,2)
# form a data frame for the summary information of AGE
df_age <- data.frame(Age_Range=levels(df1$Age_Range), Percent_Cases=Age_Percent, text1=paste0(Age_Percent, "%"))
# obtatin the bar chart for the distribution of Ohio's confirmed cases by the Age Range
p_age <- plot_ly(df_age, x=~Age_Range, y=~Percent_Cases, type="bar",
text = df_age$text1, textposition = 'outside')%>% config(displayModeBar = F)
p_age <- p_age %>% layout(title="Ages of Ohio's Confirmed Cases", xaxis=list(title="Age Range"), yaxis=list(title="Percent of Cases"))
p_age %>% layout(autosize = F, width = 600, height = 600)
```
Column {data-width=500}
---